require(hierfstat)
require(PopGenReport)
require(poppr)
require(genepop)
require(graph4lg)
require(related)
require(adegenet)
require(knitr)
require(tidyverse)
require(magrittr)

Readme

This is document is an R notebook. If you’d like view to pre-rendered figures, read a summary of analysis and interact with code, please open the relevant html file in a browser. Alternatively the notebook is published at https://rpubs.com/david_dayan/coquille_2021

To conduct a similar analyses on your computer, edit or run code: clone this repository into a directory on your local machine and open the .Rproj file in Rstudio. Files are stored on the github repository here: https://github.com/david-dayan/coquille_river_2021

Rationale / Brief Methods

The goal of this notebook is to assess if any genetic structure/differentiation can be observed between North and South Fork Coquille River O. mykiss

We conduct several brief analyses:

  • PCA
  • DAPC
  • Estimate Fst
  • Summarize Variation at Known Run-Timing Markers

Data Summary

91 individuals were genotyped at 391 GTseq genetic markers including presumably neutral and putatively adaptive genetic markers. Sample sizes and summary metadata for the the unfiltered data is below. All samples are described at winter steelhead

sheet1 <- readxl::read_xlsx("metadata/GT-seq_GC3F-CKF-005_metadata.xlsx", sheet = 1)
sheet2 <- readxl::read_xlsx("metadata/GT-seq_GC3F-CKF-005_metadata.xlsx", sheet = 2)

meta_data <- sheet1 %>%
  bind_rows(sheet2) %>%
  select(-1) %>%
  mutate(pop = str_sub(`SFGL Id`, 8, 11))

kable(meta_data %>%
  group_by(pop, `Hat/Wild`) %>%
  summarise(n = n()) )
pop Hat/Wild n
NFCQ Hatchery 15
NFCQ Wild 26
SFCQ Hatchery 22
SFCQ Wild 28
kable(meta_data %>%
  group_by(pop, Date) %>%
  summarise(n = n()) )
pop Date n
NFCQ 2016-03-30 15
NFCQ 2016-07-11 10
NFCQ 2016-07-13 16
SFCQ 2016-02-17 7
SFCQ 2016-03-01 10
SFCQ 2016-03-30 12
SFCQ 2016-07-12 21
kable(meta_data %>%
  group_by(pop, `Adult/Juv`) %>%
  summarise(n = n()) )
pop Adult/Juv n
NFCQ Juvenile 41
SFCQ Adult 17
SFCQ Juvenile 33
rm(sheet1)
rm(sheet2)

After filtering the GTseq dataset for genotype quality 71 individuals genotyped at 348 markers remained. Full details of the genotype calling and filtering is available in the notebook here. This information is also available in the project github repository. Summary information is below.

load("genotype_data/genind_2.0.R")
load("genotype_data/genotypes_2.2.R")

genos_2.2 %<>%
  left_join(select(meta_data, `Adult/Juv`, `SFGL Id`), by = c("sample" = "SFGL Id")) %>%
  relocate(sample, `Adult/Juv`)

kable(genos_2.2 %>%
  group_by(pop, `Hat/Wild`) %>%
  summarise(n = n()) )
pop Hat/Wild n
NFCQ Hatchery 13
NFCQ Wild 21
SFCQ Hatchery 19
SFCQ Wild 18
kable(genos_2.2 %>%
  group_by(pop, Date) %>%
  summarise(n = n()) )
pop Date n
NFCQ 2016-03-30 13
NFCQ 2016-07-11 9
NFCQ 2016-07-13 12
SFCQ 2016-02-17 5
SFCQ 2016-03-01 7
SFCQ 2016-03-30 11
SFCQ 2016-07-12 14
kable(genos_2.2 %>%
  group_by(pop, `Adult/Juv`) %>%
  summarise(n = n()) )
pop Adult/Juv n
NFCQ Juvenile 34
SFCQ Adult 12
SFCQ Juvenile 25

Lost samples An unusually large number of individuals failed genotyping. Metadata from these failed individuals is below. Most (15 of 20) were filtered because of very low on-target read depth. Three additional individuals were filtered due to moderately poor read depth/genotyping success rate and two were removed because of possible contamination. Filtered individuals were not enriched for a particular metadata variable (e.g. all of the filtered individuals were not adult hatchery samples or juvenile NFCQ YoY). Further details also available in genotyping log.

kable(meta_data %>%
  filter(!(`SFGL Id` %in% genos_2.2$sample)) %>%
  select(-c(Pedigree, `Vial #`)))
Date Hat/Wild Adult/Juv Sex Est. Age SFGL Id pop
2016-03-30 Hatchery Juvenile NA 1 OmyJC16NFCQ_0042 NFCQ
2016-03-30 Hatchery Juvenile NA 1 OmyJC16NFCQ_0011 NFCQ
2016-07-11 Wild Juvenile NA 1+ OmyJC16NFCQ_0044 NFCQ
2016-07-13 Wild Juvenile NA YoY OmyJC16NFCQ_0037 NFCQ
2016-07-13 Wild Juvenile NA YoY OmyJC16NFCQ_0018 NFCQ
2016-07-13 Wild Juvenile NA YoY OmyJC16NFCQ_0033 NFCQ
2016-07-13 Wild Juvenile NA 1+ OmyJC16NFCQ_0008 NFCQ
2016-02-17 Wild Adult M NA OmyAC16SFCQ_0005 SFCQ
2016-02-17 Wild Adult M NA OmyAC16SFCQ_0006 SFCQ
2016-03-01 Wild Adult M NA OmyAC16SFCQ_0009 SFCQ
2016-03-01 Hatchery Adult M NA OmyAC16SFCQ_0016 SFCQ
2016-03-01 Hatchery Adult M NA OmyAC16SFCQ_0017 SFCQ
2016-03-30 Hatchery Juvenile NA 1 OmyJC16SFCQ_0024 SFCQ
2016-07-12 Wild Juvenile NA 1+ OmyJC16SFCQ_0027 SFCQ
2016-07-12 Wild Juvenile NA YoY OmyJC16SFCQ_0043 SFCQ
2016-07-12 Wild Juvenile NA 1+ OmyJC16SFCQ_0038 SFCQ
2016-07-12 Wild Juvenile NA YoY OmyJC16SFCQ_0034 SFCQ
2016-07-12 Wild Juvenile NA 1+ OmyJC16SFCQ_0026 SFCQ
2016-07-12 Wild Juvenile NA YoY OmyJC16SFCQ_0045 SFCQ
2016-07-12 Wild Juvenile NA YoY OmyJC16SFCQ_0041 SFCQ

PCA

First, we conduct a principal component analysis.

The first step is to assess the number of PCs to retain in the analysis. We will do this using the Kaiser Guttman criterion (below) and a broken stick model (below)

# set missing data to mean allele freq (PCA does not accomodate NAs)
X <- scaleGen(genind_2.0,  NA.method="mean")


#then run pca, keep all PCs
pca1 <- dudi.pca(X, scale = FALSE, scannf = FALSE, nf = 71)

### check pcs to keep with kaiser-guttman and broken stick

#kaiser guttman
cutoff<-mean(pca1$eig)
kg <- length((pca1$eig)[(pca1$eig)>cutoff])
barplot(pca1$eig, main = "PCA eigenvalues\nKaiser-Guttman Criteria (red line)")
abline(h = cutoff, col = "red")

#broken stick
n <- length(pca1$eig)
bsm <- data.frame(j=seq(1:n), p = 0)
bsm$p[1] <- 1/n
for (i in 2:n){
  bsm$p[i] <- bsm$p[i-1]+(1/(n+1-i))
  
}
bsm$p <- 100*bsm$p/n

pca_eigs_to_plot <- as.data.frame(cbind(100*pca1$eig/sum(pca1$eig)), rev(bsm$p))
pca_eigs_to_plot %<>%
  rownames_to_column(var = "bsm") %>%
  rename(pca_eig_perc = V1) %>%
  mutate(pca_eig_perc = as.numeric(pca_eig_perc))

pca_eigs_to_plot %<>%
  rowid_to_column("row_n") %>%
  mutate(bsm = as.numeric(bsm)) %>%
  pivot_longer(!row_n, names_to = "bsm_or_eig", values_to = "percent_variance")

ggplot(data = pca_eigs_to_plot[1:71,])+geom_bar(aes(x = as.factor(row_n), y = percent_variance, color = bsm_or_eig, fill = bsm_or_eig), stat = "identity", position=position_dodge())+theme_classic()+xlab("Eigenvector")+ylab("percent of variance")+ggtitle("Broken Stick Model")

The Kaiser-Guttman criterion suggests retaining variation at the first 30 PCs, while the broken stick model suggests no PCs are relevant.

PCA Results

We plot the first 4 PC axes below according to population and hatchery vs. natural origin (HOR/NOR)

North vs South Fork

#kept all PCs
snp_pcs <- pca1$li#[,c(1:kg)]

#now plot data
snp_pcs %<>%
  rownames_to_column("sample") %>%
  left_join(select(genos_2.2, sample, pop, `Hat/Wild`))

ggplot(data = snp_pcs)+geom_point(aes(Axis1, Axis2, color = pop)) + stat_ellipse(aes(Axis1, Axis2, color = pop)) +theme_classic()+scale_color_viridis_d(name = "North or South Fork")

ggplot(data = snp_pcs)+geom_point(aes(Axis3, Axis4, color = pop)) + stat_ellipse(aes(Axis1, Axis2, color = pop)) +theme_classic()+scale_color_viridis_d(name = "North or South Fork")

#3d plot as well
plotly::plot_ly(x=snp_pcs$Axis1, y=snp_pcs$Axis2, z=snp_pcs$Axis3, type="scatter3d", mode="markers", color=snp_pcs$pop, alpha = 0.8)

There is no apparent genetic structure in principal component space between North and South Fork samples.

HOR vs NOR
Now lets’s also add HOR/NOR status

ggplot(data = snp_pcs)+geom_point(aes(Axis1, Axis2, color = `Hat/Wild`)) + stat_ellipse(aes(Axis1, Axis2, color = `Hat/Wild`)) +theme_classic()+scale_color_viridis_d(name = "HOR or NOR", begin = 0.2, end = 0.8)

ggplot(data = snp_pcs)+geom_point(aes(Axis3, Axis4, color = `Hat/Wild`)) + stat_ellipse(aes(Axis1, Axis2, color =`Hat/Wild`)) +theme_classic()+scale_color_viridis_d(name = "HOR or NOR", begin = 0.2, end = 0.8)

#3d plot as well
plotly::plot_ly(x=snp_pcs$Axis1, y=snp_pcs$Axis2, z=snp_pcs$Axis3, type="scatter3d", mode="markers", color=snp_pcs$`Hat/Wild`, alpha = 0.8, colors = viridis::viridis(2, begin = 0.2, end = 0.8))

Also no apparent structure in PC space between hatchery and wild.

DAPC

Set Up

Sometimes PCA will fail to find structure when both FST and the number of markers is low. Next we will attempt find a combination of alleles in the dataset that maximizes differences between North and South Fork samples using discriminant analysis of principal components (DAPC)

First we need to asses the correct number of PCs to retain in the DAPC, including too many can lead to overfitting and observation of among-group differences that are unlikely to be biologically meaningful. Below we use cross validation and the a.score approach to find the optimum number of PCs while avoiding overfitting.

# run this interactively to find the optimum number of PCs without overfitting

# first fit a DAPC and create the other dataframe needed to run a cross validation
dapc_full <- dapc(genind_2.0, n.pca = 71, n.da = 1)

mat <- as.matrix(scaleGen(genind_2.0, NA.method="mean", scale=FALSE, center=FALSE))
xpop <- pop(genind_2.0)
xval <- xvalDapc(mat, xpop, n.pca.max = 71, training.set = 0.9, result = "overall", center = TRUE, scale = FALSE, n.pca = seq(1,71, length.out = 71), n.rep = 50, xval.plot = TRUE)

# 19 was the best number of PCs achieving the lowest MSE / highest proportion of successful assignment 


# now lets see what the a.score has to say
optim.a.score(dapc_full, smart = FALSE )
#optim a score says 18

# we will use the lowest value in the final DAPC to avoid overfitting, here 18 pcs

Cross validation using 50 replicates of 90%:10% training:test datasets found 21 PCs to achieve the lowest error rate, successfully assigning 79% of samples in the test set to the correct source population. The best number of pcas according to the a.score procedure was 18. We will use the lower value from these two tests to conservatively avoid overfitting.

Results

Below we show the results of the DAPC with 18 PCs

DAPC Figure

dapc_18 <- dapc(genind_2.0, n.pca = 18, n.da =1)


plot_data <- as.data.frame(dapc_18$ind.coord)
plot_data$pop <- as.character(genind_2.0$pop)

ggplot(data=plot_data)+geom_density(aes(x=LD1, color = pop, fill = pop), alpha = 0.5)+theme_classic()+scale_color_viridis_d(name = "North or South Fork")+scale_fill_viridis_d(name = "North or South Fork")

The density plot above demonstrates that using 18 principal components derived from 348 genetic markers we can identify subtle structure between North and South Fork samples. Note that while there is a difference in the mean value of the discriminant axis for each population, there is substantial overlap. This incomplete discrimination suggests subtle structure.

Variable Loadings
Which markers contribute to this structure and are they enriched for a particular annotation?

The plot and table below shows variable loadings (contribution to the first discriminant axis for each allele).

#get variable loadings
marker_loadings1 <- loadingplot(dapc_18$var.contr, axis=1,thres=.007, lab.jitter=1, main = "loading plot for DA 1", )

markers1 <- unique(substr(names(marker_loadings1$var.values),1,nchar(names(marker_loadings1$var.values))-2))

#get marker annotations

marker_mapping <- readxl::read_xlsx("metadata/final_mapping_results.xlsx", sheet = 1)

marker_mapping %<>%
  mutate(marker = str_replace(marker, "Omy(\\d+)", "Chr\\1")) %>% #marker name convention is different
  mutate(marker = str_replace(marker, "\\.", "_")) %>% 
  mutate(neutral = if_else(str_detect(`Presumed Type`, 'Adaptive|adaptive'), "adaptive", "neutral"))

mls <- as.data.frame(marker_loadings1$var.values) %>%
  rownames_to_column(var = "marker") %>%
  mutate(marker = str_sub(marker, 0, nchar(marker) -2)) %>%
  distinct(marker, .keep_all= TRUE) %>%
  rename("loading_value" = "marker_loadings1$var.values") %>%
  mutate(loading_value = loading_value*2) %>%
  left_join(select(marker_mapping, marker, `Presumed Type`, chr, start)) %>%
  arrange(desc(loading_value)) %>%
  rename("Chromosome" = "chr", "Marker Position" = "start", "Annotation" = "Presumed Type", "Marker" = "marker", "Variable Loading" = "loading_value")
kable(mls)
Marker Variable Loading Annotation Chromosome Marker Position
Omy_RAD24287-74 0.0350260 Adaptive. Basin-wide, top-outlier 8 28035667
Omy_star-206 0.0284351 Neutral 6 36624834
Omy_aromat-280 0.0265690 Neutral 4 3391425
Omy_cd59-206 0.0239317 Neutral 26 8028280
OMS00003 0.0235605 Neutral 1 59464291
Omy_96222-125 0.0228288 Neutral 15 24041060
Omy_RAD62596-38 0.0223117 Neutral 7 49977729
Omy_128996-481 0.0218681 Neutral 18 30802061
Omy_117540-259 0.0192377 Neutral 12 5079321
Omy_101832-195 0.0181796 Neutral 17 17015627
Omy_RAD29700-18 0.0171439 Neutral 20 1673132
OMS00179 0.0159952 Neutral 8 25539866
Omy_RAD7016-31 0.0156525 Adaptive. Basin-wide, top-outlier 6 51490729
Omy_RAD59758-41 0.0152490 Adaptive. Minimum annual precipitation. Basin-wide, precipitation-related; 25 40219318
Omy_metA-161 0.0145569 Neutral 1 24257279
Omy_RAD58213-70 0.0144817 Neutral 17 58266181
Omy_RAD43694-41 0.0141444 Adaptive. Basin-wide, top-outlier 17 57728473

These results suggest that the subtle structure we observe among samples is driven a mix of neutral and putatively adaptive genetic markers spread throughout the genome. The markers above represent the top 17 markers that load onto this disciminant axis and explain 70% of the variation captured by it.

Genetic Differentiation

Next we will estimate the level of genetic differentiation between North and South Fork samples using FST.

Let’s calculate some basic F-statistics

fstat <- genind2hierfstat(genind_2.0)
colnames(fstat) <- c(pop, names(genind_2.0$loc.n.all))

basicstats <- basic.stats(fstat)
kable(basicstats$overall, caption = "Basic F-statistics of Dataset")
Basic F-statistics of Dataset
x
Ho 0.2948
Hs 0.3026
Ht 0.3035
Dst 0.0009
Htp 0.3043
Dstp 0.0017
Fst 0.0029
Fstp 0.0057
Fis 0.0259
Dest 0.0025

For FST let’s also estimate using the Weir and Cockerham method.

genet.dist(fstat, method="WC84")
##             NFCQ
## SFCQ 0.005713724

The estimated FST between North and South Fork samples is very small at 0.0057.

Run-Timing Markers

Given the low overall FST, absence of structure in the dataset and the observation that highly discriminatory markers do not include run-timing markers, it is unlikely that there is any interesting pattern at the markers in the data. Let’s check just to make sure.

Below we make a heatmap of allele frequencies. Markers are arranged in genomic order.

run_timing_loci_names <- marker_mapping %>%
  filter(chr == "28" | CRITFC_chromosome == "28") %>%
  filter(str_detect(`Presumed Type`, 'run|Run')) %>%
  select(marker)

#different naming convention, lets fix
run_timing_loci_names <- str_replace(run_timing_loci_names$marker, "Omy28", "Chr28")

#
all_counts <- allele.dist(genind_2.0, mk.figures = FALSE)$count

#make into a dataframe
all_counts <- as.data.frame(do.call(rbind, all_counts))
colnames(all_counts) <- c("NF_count", "SF_count")
all_counts$sum <- rowSums(all_counts, na.rm = TRUE)

all_freqs <- allele.dist(genind_2.0, mk.figures = FALSE)$frequency
#make into a dataframe
all_freqs <- as.data.frame(do.call(rbind, all_freqs))

all_freqs <- as.data.frame(cbind(all_freqs, all_counts))

##### get only minor allele
all_freqs$marker <- genind_2.0$loc.fac

#now group by marker and keep the minor allele, then convert counts to 
all_maf <- all_freqs %>%
  group_by(marker) %>%
  slice_min(sum) %>%
  replace(., is.na(.), 0)

run_timing_maf <- all_maf %>%
  filter(marker %in% run_timing_loci_names) %>%
  left_join(select(marker_mapping, marker, CRITFC_SNP_pos_genome)) %>%
  arrange(CRITFC_SNP_pos_genome, .keep_all=TRUE)

#manually enter the position for one of these
run_timing_maf[13,7] <- "11702210"
run_timing_maf %<>%
  arrange(CRITFC_SNP_pos_genome, .keep_all=TRUE)

#definition of minor allele frequency broke for fixed marker, let's reset to 0
run_timing_maf[6,1:5] <- list(0,0,0,0,0)

tmat <- t(as.matrix(run_timing_maf[,1:2]))
colnames(tmat) <- run_timing_maf$marker
pheatmap::pheatmap(tmat, show_colnames  = T, cluster_cols = FALSE, main = "Minor Allele Frequency of Run-Timing Markers", cluster_rows = FALSE)

Only very subtle differences in allele frequency between North and South Fork steelhead samples at run timing markers.

Sometimes it is useful to look at patterns among individuals instead of allele frequency.

Below we plot the genotypes of all samples at run-timing markers. Markers are arranged in genomic order. Rows (individuals) are hierarchically clustered within each population. Allele is displayed by color (overall minor allele homozygote = BLUE, heterozygote = yellow, major allele homozygote = red)

#use this to set minor allele
#colSums(genind_2.0[loc=run_timing_loci_names]$tab, na.rm = TRUE)

sep_genind28 <- seppop(genind_2.0[loc=run_timing_loci_names])

polarized_allele_counts <- as.data.frame(sep_genind28$NFCQ$tab[,c(1,3,5,7,9,11,13,15,17,19,20,22,24)]) %>%
  bind_rows(as.data.frame(sep_genind28$SFCQ$tab[,c(1,3,5,7,9,11,13,15,17,19,20,22,24)]))

colnames(polarized_allele_counts) <- str_sub(colnames(polarized_allele_counts), 1, nchar(colnames(polarized_allele_counts))-2)

polarized_allele_counts %<>%
  rownames_to_column(var = "sample") %>%
  mutate(pop = str_sub(sample, 8,11)) %>%
  relocate(run_timing_maf$marker) #reorder to reflect genome order
  

pac_nf <- polarized_allele_counts %>%
  filter(pop == "NFCQ")
pac_sf <- polarized_allele_counts %>%
  filter(pop == "SFCQ")
nf_heat <- pheatmap::pheatmap(pac_nf[,-c(14,15)], cluster_cols  = FALSE, show_rownames = FALSE, main = "North Fork Major Allele Count")

sf_heat <- pheatmap::pheatmap(pac_sf[,-c(14,15)], cluster_cols  = FALSE, show_rownames = FALSE, main = "South Fork Major Allele Count")

At run timing markers there appear to be two major clusters. These clusters were driven by the first three SNPs in the genomic region (Chr28_11607954, Omy_RAD52456-17 and Omy_GREB1_05). While all SNPs annotated as run-timing marker SNPs have demonstrated an association with this phenotype in some populations, in the nearby Rogue River, we have demonstrated that two of these SNPs are not diagnostic of run timing. The third (Omy_RAD52456-17) was not analyzed in that study. This suggests that genomic variation within our North and South Fork Coquille steelhead samples between these two clusters is unlikely to have a large phenotype effect. Similarly, there is high genetic diversity (heterozygosity) at a marker at the 3’ border of this region (Chr28_11773194), but this marker is not diagnostic of run timing in the Rogue River.

Ignoring the first three and last marker, we also observed some genetic variation at other markers known to have a strong association with run-timing in the nearby Rogue River. Some individuals demonstrate a large number of minor alleles compared to our expectations given that all individuals are winter run.
In both the NF and SF Coquille, a few individuals were heterozygous at many adult run timing markers. This result suggests that these individuals have one copy of the “early” run allele. In the NF Coquille, two individuals were homozygous at one run timing marker suggesting that they have two copies of the “early” run allele. While we can not predict the phenotypic effect of this variation, it suggests Coquille winter run steelhead harbor some early-migration associated alleles.

Below we gather the metadata for individuals with many copies of proposed “early” alleles.

#rowSums(pac_nf[,4:12]) < 15
#pac_nf[rowSums(pac_nf[,4:12], na.rm = TRUE) < 15,14]

min_allele <- pac_nf %>%
  bind_rows(pac_sf) %>%
  select(-c(pop,"Chr28_11607954",  "Omy_RAD52458-17", "Omy_GREB1_05", "Chr28_11773194" )) %>% 
  rowwise(sample) %>%
  mutate(min_allele_ct = sum(c_across(cols = everything())=="1", na.rm = TRUE) + 2*sum(c_across(cols = everything())=="0", na.rm = TRUE)) %>%
  select(sample, min_allele_ct) %>%
  filter(min_allele_ct > 3) %>%
  left_join(meta_data, by = c("sample" = "SFGL Id"))

#ggplot(data = min_allele)+geom_histogram(aes(x = min_allele_ct))

kable(min_allele, caption = "metadata from samples with more than 3 minor alleles")
metadata from samples with more than 3 minor alleles
sample min_allele_ct Date Vial # Hat/Wild Adult/Juv Sex Est. Age Pedigree pop
OmyJC16NFCQ_0014 5 2016-07-11 44 StW 014 Wild Juvenile NA YoY OmyJC16NFCQ NFCQ
OmyJC16NFCQ_0019 8 2016-07-13 44 StW 019 Wild Juvenile NA 1+ OmyJC16NFCQ NFCQ
OmyJC16NFCQ_0045 4 2016-03-30 44 StW 045 Hatchery Juvenile NA 1 OmyJC16NFCQ NFCQ
OmyAC16SFCQ_0002 7 2016-02-17 144 StW 002 Hatchery Adult F NA OmyAC16SFCQ SFCQ
OmyJC16SFCQ_0029 4 2016-03-30 144 StW 029 Hatchery Juvenile NA 1 OmyJC16SFCQ SFCQ
OmyJC16SFCQ_0031 4 2016-07-12 144 StW 031 Wild Juvenile NA 1+ OmyJC16SFCQ SFCQ

Summary

Differentiation We examined genetic variation at 348 genetic markers among 34 North Fork and 37 South Fork Coquille River Steelhead. Overall FST was very low and PCA did not reveal any clear structure to the data. Using a discriminant analysis of principal components, we observed very subtle structure driven by a mix of neutral and putatively adaptive markers. The genetic structure observed at these markers was insufficient to fully discriminate between the two groups, however cross validation suggests that individuals could be successful assigned to their source population using genetic information with 79% accuracy.
These results suggest there are no strong genetic differences between our North and South Fork samples. However, our GTseq panel only provides a small snapshot of the genetic variation within and among these groups and other potentially ecologically relevant genetic differences between these groups may exist.

Run timing markers There was limited variation among run-timing associated markers within or between our samples. Four markers showed higher genetic diversity than the others. These markers flank the genomic region associated with run-timing and three of the four have been observed to have low correlation with run-timing phenotypes in the nearby Rogue River.
We also observed some genetic variation at other markers in the genomic region known to have an association with run-timing in the nearby Rogue River. A small number individuals carry many minor alleles compared to our expectations given that all individuals are winter run. This suggests early-run alleles may be found among Coquille River winter steelhead, but the phenotypic impact of these alleles is not known.